# merge.R
# Outputs: master_elec.dta, the master dataset
# Dependencies: depends on  partylist.xlsx and various third-party datasets
#  (see paper and code)


library(dplyr)
library(stringr)
library(tidyr)
library(foreign)
library(lubridate)
library(readxl)
library(countrycode)
library(readr)

setwd('')

# ------------------------------------------------------------------------------
# Data
# ------------------------------------------------------------------------------

# Countries to be covered
westerneurope <- c('Austria', 'Belgium', 'Switzerland', 'Cyprus', 'Germany',
                   'Denmark', 'Spain', 'Finland', 'France', 'United Kingdom',
                   'Greece', 'Ireland', 'Iceland', 'Italy',
                   'Luxembourg', 'Malta', 'Netherlands', 'Norway', 'Portugal',
                   'Sweden')

easterneurope <- c('Bulgaria', 'Czech Republic', 'Estonia', 'Croatia',
                   'Hungary', 'Lithuania', 'Latvia', 'Poland', 'Romania',
                   'Slovakia', 'Slovenia')

europe <- c(westerneurope, easterneurope)

# ParlGov party specific election results data ---------------------------------
p <- read.csv('data/parlgov/view_election.csv', stringsAsFactors = F) %>%
  tbl_df()

# old
# make a party level data set
# tmp <- p %>% filter(ymd(election_date) > '1980-01-01', vote_share > 2,
#                     country_name %in% unique(c$country)) %>%
#   group_by(party_id) %>%
#   summarise(country_name = unique(country_name),
#             party_name_short = unique(party_name_short),
#             party_name_english = unique(party_name_english),
#             elections = paste(election_date, collapse = ';')) %>%
#   arrange(country_name, party_name_short) %>%
#   mutate(populist = '')
#
# write.xlsx(tmp, 'data/partylist.xlsx')

# # new
# # make a party level data set
# tmp <- p %>% filter(ymd(election_date) > '1970-01-01', vote_share > 1,
#                     country_name %in% europe) %>%
#   group_by(party_id) %>%
#   summarise(country_name = unique(country_name),
#             party_name_short = unique(party_name_short),
#             party_name_english = unique(party_name_english),
#             elections = paste(election_date, collapse = ';')) %>%
#   arrange(country_name, party_name_short) %>%
#   mutate(populist = '')
#
# write.xlsx(tmp, 'data/partylist_new.xlsx')

p$election_year <- year(p$election_date)

p <- p %>% filter(country_name %in% europe, #election_year >= 1970,
                  !is.na(vote_share), vote_share > 1)

# Add info which parties are populist ------------------------------------------

pl <- read_xlsx('data/partylist.xlsx', sheet = 1, range = cell_cols(1:7))

pl <- pl %>% select(party_id, populism, `Populism Source`) %>%
  mutate_at(vars(contains('pop')), as.character)

p <- left_join(p, pl, by = 'party_id')

p <- p %>% rename(populism_source = `Populism Source`)

p$populism <- as.integer(p$populism)

# recode BZO (Austria)
p$populism[which(p$party_id == 1536)] <- 1
p$populism[which(p$party_id == 1536 & p$election_year > 2013)] <- 0

# recode FPO (Austria)
p$populism[which(p$party_id == 50)] <- 0
p$populism[which(p$party_id == 50 & p$election_year >= 1986)] <- 1

# recode NDSV (Bulgaria)
p$populism[which(p$party_id == 544)] <- 0
p$populism[which(p$party_id == 544 & p$election_year == 2001)] <- 1

# recode SP-P (Finland)
p$party_name_english[which(p$party_id == 200)] <- 'Finnish Rural Party / True Finns'

# recode PASOK (Greece)
p$populism[which(p$party_id == 1338)] <- 0
p$populism[which(p$party_id == 1338 & p$election_year <= 1996 )] <- 1

# recode DP (Lithuania)
p$populism[which(p$party_id == 581)] <- 0
p$populism[which(p$party_id == 581 & p$election_year <= 2008)] <- 1

# recode PiS (Poland)
p$populism[which(p$party_id == 528)] <- 0
p$populism[which(p$party_id == 528 & p$election_year >= 2005)] <- 1

# recode PSD (Romania)
p$populism[which(p$party_id == 1120)] <- 0
p$populism[which(p$party_id == 1120 & p$election_year <= 2000)] <- 1

# recode Smer (Slovakia)
p$populism[which(p$party_id == 220)] <- 0
p$populism[which(p$party_id == 220 & p$election_year <= 2006)] <- 1

# recode SVP-UDC (Switzerland)
p$populism[which(p$party_id == 750)] <- 0
p$populism[which(p$party_id == 750 & p$election_year >= 1992)] <- 1

# write.xlsx(p, 'data/partylist_tscs.xlsx')
# write.csv(p, 'data/partylist_tscs.csv', row.names = F)

# ParlGov party data -----------------------------------------------------------

pp <- read.csv('data/parlgov/view_party.csv', stringsAsFactors = F) %>% tbl_df()

pp <- pp %>% select(party_id, party_name_ascii, family_name, family_id)

# ParlGov election data --------------------------------------------------------

pe <- read.csv('data/parlgov/viewcalc_election_parameter.csv',
               stringsAsFactors = F) %>% tbl_df()

# CPDS -------------------------------------------------------------------------

c <- read.dta('data/CPDS/CPDS_1960-2014_stata.dta') %>% tbl_df()

c <- c %>% select(year:emu, elect:vturn, openc, realgdpgr, inflation, unemp) %>%
  mutate(election_type = 'parliament',
         elect = as.character(elect)) %>%
  rename(openc_cpds = openc,
         unemp_cpds = unemp)

c %>% filter((country == 'Switzerland' & year == 2015) |
               (country == 'Slovenia' & year == 1990) |
               (country == 'Latvia' & year == 1990))
# # A tibble: 1 × 12
# year  country countryn   iso iso3n  cpds1           poco     eu     emu      elect vturn
# <int>    <chr>    <int> <chr> <int> <fctr>         <fctr> <fctr>  <fctr>      <chr> <dbl>
#   1  1990 Slovenia       31   SVN   705     No Post-communist Non-EU Non-EMU 1990-04-08    77

# World Bank: trade openness ---------------------------------------------------

trade <- read_csv('data/worldbank/API_NE.TRD.GNFS.ZS_DS2_en_csv_v2_9913704.csv',
               skip = 4)

trade <-
  trade %>% mutate(`Country Name` =
                  recode(`Country Name`, `Slovak Republic` = 'Slovakia')) %>%
  filter(`Country Name` %in% europe) %>%
  select(`Country Name`, `Country Code`, `1960`:`2017`) %>%
  gather(key = year, value = openc_wb, `1960`:`2017`) %>%
  rename(country_name = `Country Name`) %>%
  mutate(year = as.integer(year),
         openc_wb = as.numeric(openc_wb)) %>%
  select(-`Country Code`)

# Ameco: unemployment ----------------------------------------------------------

ameco <- read_xls('data/ameco/amecoSerieCurrent.xls', skip = 3)

ameco <-
  ameco %>% filter(Country %in% europe) %>%
  select(-Unit) %>%
  gather(key = year, value = unemp_ameco, `2019`:`1960`) %>%
  mutate(year = as.integer(year),
         unemp_ameco = as.numeric(unemp_ameco))

# Idea data --------------------------------------------------------------------

i <- read_xls('data/idea/idea_vt_20180524.xls', sheet = 1, col_names = T) %>%
  tbl_df()

names(i) <- tolower(names(i)) %>% str_replace('- ', '') %>%
  str_replace_all('[(|)]', '') %>%
  str_replace_all(' ', '_')

i <- i %>% mutate(voter_turnout =
                    as.numeric(trimws(str_replace(voter_turnout, '%', ''))),
                  vap_turnout =
                    as.numeric(trimws(str_replace(vap_turnout, '%', ''))))

i %>% filter((country == 'Switzerland' & year == 2015) |
               (country == 'Slovenia' & year == 1990) |
               (country == 'Latvia' & year == 1990))
# country election_type year voter_turnout  total_vote registration vap_turnout
# 1      Latvia    parliament 1990        81.2 % 1,600,000.0  1,970,443.0      81.0 %
#   2 Switzerland    parliament 2015        48.4 % 2,563,025.0  5,295,506.0      38.6 %
#   voting_age_population  population invalid_votes compulsory_voting
# 1           1,976,540.0 2,671,000.0          <NA>                No
# 2           6,634,505.0 8,121,830.0          <NA>                No

i <-
  i %>% mutate_at(vars(registration, voting_age_population, population),
                  str_replace_all, pattern = ',| |%', '') %>%
  mutate_at(vars(registration, voting_age_population, population), as.numeric) %>%
  mutate(election_type = 'parliament',
         compulsory_voting = compulsory_voting == 'Yes')

# Database of Political Institutions (DPI) database ----------------------------

dpi <- read.dta('data/DPI2015/DPI2015_stata11.dta') %>% tbl_df()

dpi <- dpi %>% select(countryname, year, thresh, pluralty) %>%
  mutate(countryname = recode(countryname, UK = 'United Kingdom',
                              'Czech Rep.' = 'Czech Republic',
                              'FRG/Germany' = 'Germany'))

# Nils-Christian Borman and Matt Golder's electoral systems data ---------------

es <- read.csv('data/golder/es_data-v3.csv', stringsAsFactors = F) %>%
  tbl_df() %>% mutate(country = str_trim(country))

es <-
es %>% mutate(country = recode(country, "Greek Cyprus" = "Cyprus",
                               "West Germany" = "Germany",
                               "Czechoslovakia" = "Czech Republic",
                               )) %>%
  filter(presidential == 0, country %in% europe#, year >= 1970
         ) %>%
  rename(seats_es = seats)

# fix missings
es_add <- data_frame(country = c('Cyprus', 'Cyprus', 'Cyprus',
                                 'Malta',
                                 'Portugal',
                                 'Slovakia', 'Slovakia',
                                 'Slovenia'),
                     year = c(1976, 1981, 2016,
                              2013,
                              1975,
                              1990, 1992,
                              1990),
                     legislative_type = 2,
                     tier1_avemag = c(NA, NA, 9.33,
                                      5,
                                      NA,
                                      NA, NA,
                                      NA))

es <- distinct(es, country, year, .keep_all = T) %>%
  bind_rows(., es_add) %>% arrange(country, year) %>%
  mutate(tier1_avemag = ifelse(tier1_avemag < 0, NA, tier1_avemag))

es$pr <- ifelse(es$legislative_type == 3, es$legislative_type - 2,
                es$legislative_type - 1) # codes Germany, Bulgaria, Croatia, Greece, LIthuania as PR,
# others need to be corrected
# es$pr[which(es$country == 'Croatia & elec_year == 1992)] <- 0 # debatable, see: https://en.wikipedia.org/wiki/Croatian_parliamentary_election,_1992
# es$pr[which(es$country == 'Hungary' & elec_year >= 2014)] <- 1 # debatable, see: https://en.wikipedia.org/wiki/Elections_in_Hungary#Changes_in_the_electoral_system_from_2012_onwards
es$pr[which(es$country == 'Italy' & es$year >= 1994 & es$year <= 2001)] <- 0
# es$pr[which(es$country == 'Lithuania')] <-
es$pr[which(es$country == 'Romania' & es$year == 2008)] <- 1

# Kayser & Lindstädt competitiveness measure -----------------------------------

kl <- read.csv('data/lindstaedt_kayser_replication/TheLossProbVariable.csv',
               stringsAsFactors = F) %>% tbl_df() %>%
  select(isocode, elecyr, lpr, lpr2)

kl <- distinct(kl, isocode, elecyr, .keep_all = T)

# Dassonneville et al: Pedersen Index (1950-2014) ------------------------------

v <- read.csv('data/volatility/PedersenIndex_1950_2014.csv',
              stringsAsFactors = F) %>% tbl_df()

names(v) <- tolower(names(v))

v$country <- dplyr::recode(v$country, `The Netherlands` = 'Netherlands',
                           `West-Germany (BRD)` = 'Germany')

v <- v %>% select(country, electionyear, l_sq, enep_d = enep, enpp_d = enpp,
                  netvolatility) %>%
  distinct(country, electionyear, .keep_all = T)

# swiid6 6: data on inequality -------------------------------------------------

load('data/swiid6_0/swiid6_0.rda')

s <- swiid_summary

# Migration data ---------------------------------------------------------------

m <- read.delim('data/migration/migr_imm8.tsv', stringsAsFactors = F) %>%
  tbl_df()

m <-
m %>% separate(agedef.age.unit.sex.geo.time,
               into = c('agedef', 'age', 'unit', 'sex', 'geo', 'time'),
               sep = ',') %>%
  gather(key = year, value = migration, starts_with('X')) %>%
  mutate(migration = as.integer(str_extract(migration, '\\d*')),
         year = as.integer(str_extract(year, '\\d+'))) %>%
  filter(agedef == 'COMPLET', age == 'TOTAL', sex == 'T') %>%
  select(-agedef, -unit, -age, -sex, -time)

m$iso3c <- countrycode(m$geo, 'iso2c', destination = 'iso3c')

m <- m %>% select(iso3c, year, migration) %>%
  distinct(iso3c, year, .keep_all = T)

# Age of democracy -------------------------------------------------------------

a <- read_csv('data/ageofdemocracy/age-of-democracies.csv')

names(a) <- c('country_name', 'code', 'year', 'age_of_democracy')

a <-
a %>% filter(country_name %in% europe) %>%
  mutate(age_of_democracy = as.integer(age_of_democracy),
         democracy_since = year - age_of_democracy,
         democracy_since = ifelse(country_name == 'Germany',
                                  1949, democracy_since)) %>%
  select(country_name, democracy_since)

# Margin between first- and second-placed party (proxy for competitiveness) ----

comp <-
p %>% group_by(election_id) %>%
  summarise(margin = max(vote_share) - nth(vote_share, 2))

# ------------------------------------------------------------------------------
# What's in the data
# ------------------------------------------------------------------------------

# table(p$country_name)
#
# by(p$p$country_name, range)
#
# table(i$country, i$election_type)
#
#
# data.frame(p_c = c(unique(p$country_name), rep(NA, 14)),
#            p_i = unique(i$country))

# ------------------------------------------------------------------------------
# Merge the datasets (yearly)
# ------------------------------------------------------------------------------

# p <- read.csv('data/partylist_tscs.csv', stringsAsFactors = F)  # read the party data

# Merge p (party election data), pp (party data) and pe (election data) both from ParlGov

d <- left_join(p, pp, by = 'party_id')  # merge with pp

d <- left_join(d, pe, by = 'election_id')  # merge with pe

# table(is.na(d$turnout))
#
# d %>% filter(is.na(turnout)) %>%
#   select(country_name, election_type, election_date, turnout) %>%
#   print(n = nrow(.))
# Switzerland 2015, Latvia 1990 and Slovenia 1990 are missing turnout

# Merge p and c (CPS) by country, election date and election type
# Turnout
d <- left_join(d, select(c, country, elect, election_type, vturn),
               by = c("country_name" = "country",
                            "election_date" = "elect",
                            "election_type"))

# Merge p and c (CPS) by country, election date and election type
# other covariates
d <- left_join(d, select(c, -vturn, -election_type),
               by = c("country_name" = "country",
                      "election_year" = "year")) %>%
  group_by(country_name) %>%
  fill(poco, .direction = 'downup') %>%
  mutate(cee = as.integer(poco == 'Post-communist'))

# Worldbank: trade openness
d <- left_join(d, trade, by = c("country_name",
                                 "election_year" = "year"))

# AMECO: unemployment
d <- left_join(d, ameco, by = c('country_name' = 'Country',
                                'election_year' = 'year'))

# Merge d and i (IDEA) by country, year and election type
d <- left_join(d, i, by = c("country_name" = "country",
                            "election_year" = "year",
                            "election_type"))

# fill in missing value in Switzerland 2015
d$turnout[which(d$country_name == 'Switzerland' & d$election_year == 2015)] <-
  d$voter_turnout[which(d$country_name == 'Switzerland' &
                          d$election_year == 2015)]

# Merge d and dpi by country and year
d <- left_join(d, dpi, by = c('country_name' = 'countryname',
                              'election_year' = 'year'))

# Merge d and electoral systems data by country and year
d <- left_join(d, es, by = c('country_name' = 'country',
                            'election_year' = 'year'))

# Merge d and Kayser Lindstädt electoral competiveness data
d <- left_join(d, kl, by = c('country_name_short' = 'isocode',
                             'election_year' = 'elecyr'))

# Merge d and Volatility data
d <- left_join(d, v, by = c('country_name' = 'country',
                               'election_year' = 'electionyear'))

# Merge d and inequality data
d <- left_join(d, s, by = c('country_name' = 'country',
                            'election_year' = 'year'))

# Merge d and migration data
d <- left_join(d, m, by = c('country_name_short' = 'iso3c',
                            'election_year' = 'year'))

# Merge d and age of democracy
d <- left_join(d, a, by = 'country_name')

# Merge d and margin between first- and second-placed party
d <- left_join(d, comp, by = 'election_id')

# Create openc and unemp variables ---------------------------------------------

# openc
d$openc <- d$openc_cpds
d$openc[which(is.na(d$openc_cpds))] <- d$openc_wb[which(is.na(d$openc_cpds))]

# unemp
d$unemp <- d$unemp_cpds
d$unemp[which(is.na(d$unemp_cpds))] <- d$unemp_ameco[which(is.na(d$unemp_cpds))]

# Further party-level variables ------------------------------------------------

d$populist <- d$populism >= 1
d$populist_rightwing <- d$populist == T & d$family_name == 'Right-wing'
d$populist_leftwing <- d$populist == T & d$family_name == 'Communist/Socialist'

d$populist_right <- d$populist == T &
  d$family_name %in% c('Right-wing', 'Conservative')
d$populist_left <- d$populist == T &
  d$family_name %in% c('Communist/Socialist', 'Social democracy')

# new: whether party for first time in dataset
d <- d %>% group_by(country_id, party_id) %>%
  arrange(country_id, party_id, election_year) %>%
  mutate(new = is.na(party_id == lag(party_id)),
         new_populist = new * populist,
         new_populist_right = new * populist_right,
         new_populist_left = new * populist_left) #%>%
  # select(country_name_short, party_name_short, election_year, new) %>% print(n = 200)

write.dta(d, 'data/master.dta', version = 10)

write.csv(d, 'data/master.csv', row.names = F)

# ------------------------------------------------------------------------------
# Election-level dataset
# ------------------------------------------------------------------------------

# d <- read.csv('data/master.csv', stringsAsFactors = F)

e <- d %>% ungroup() %>% distinct(election_id, .keep_all = T) %>%
  select(country_name_short:election_date, country_id:previous_cabinet_id,
         election_year, turnout:compulsory_voting, thresh, elec_id:pr,
         lpr, lpr2,
         l_sq, enep_d, enpp_d, netvolatility,
         gini_disp, gini_disp_se, gini_mkt, gini_mkt_se, abs_red, abs_red_se,
          rel_red, rel_red_se,
         migration, openc, unemp, margin, democracy_since)

tmp <- d %>% group_by(election_id) %>%
  summarise(# populist
            populistparty = sum(populist, na.rm = T) >= 1,
            populistparties = sum(populist, na.rm = T),
            populistvoteshare = sum(vote_share * populist, na.rm = T),
            populistmaxvoteshare = max(vote_share * populist, na.rm = T),
            populistseatshare = round(sum(populist * seats, na.rm = T) /
                                        unique(seats_total, na.rm = T) * 100, 2),
            populistpresence = populistseatshare > 0,
            populist_new = sum(new_populist, na.rm = T) >= 1,
            populist_newparties = sum(new_populist, na.rm = T),
            # populist right
            populistparty_right = sum(populist_right, na.rm = T) >= 1,
            populistparties_right = sum(populist_right, na.rm = T),
            populistvoteshare_right = sum(vote_share * populist_right,
                                          na.rm = T),
            populistmaxvoteshare_right = max(vote_share * populist_right,
                                             na.rm = T),
            populistseatshare_right = round(sum(populist_right * seats,
                                                na.rm = T) /
                                              unique(seats_total,
                                                     na.rm = T) * 100, 2),
            populistpresence_right = populistseatshare_right > 0,
            populist_new_right = sum(new_populist_right, na.rm = T) >= 1,
            populist_newrightparties = sum(new_populist_right, na.rm = T),
            # populist left
            populistparty_left = sum(populist_left, na.rm = T) >= 1,
            populistparties_left = sum(populist_left, na.rm = T),
            populistvoteshare_left = sum(vote_share * populist_left, na.rm = T),
            populistmaxvoteshare_left = max(vote_share * populist_left,
                                            na.rm = T),
            populist_new_left = sum(new_populist_left, na.rm = T) >= 1,
            populist_newleftparties = sum(new_populist_left, na.rm = T),
            populistseatshare_left =
              round(sum(populist_left * seats, na.rm = T) /
                      unique(seats_total, na.rm = T) * 100, 2),
            populistpresence_left = populistseatshare_left > 0,
            # Only left, only right, both
            populistparty_leftonly = populistparty_left == T & populistparty_right == F,
            populistparty_rightonly = populistparty_left == F & populistparty_right == T,
            populistparty_both = populistparty_left == T & populistparty_right == T,
            populist_new_leftonly = populist_new_left == T & populist_new_right == F,
            populist_new_rightonly = populist_new_left == F & populist_new_right == T,
            populist_new_both = populist_new_left == T & populist_new_right == T,
            #names of populist parties
            populistparty_names = paste(party_name_english[which(populist == T)], collapse = ', '),
            populistparty_left_names = paste(party_name_english[which(populist_left == T)], collapse = ', '),
            populistparty_right_names = paste(party_name_english[which(populist_right == T)], collapse = ', ')
            )

e <- left_join(e, tmp, by = 'election_id')

e <- e %>% arrange(country_name, election_date) %>%
  group_by(country_name, election_type) %>%
  mutate(populist_legacy = sum(lag(populistvoteshare),
                               lag(populistvoteshare, 2),
                               lag(populistvoteshare, 3), na.rm = T))

# Time variable (count of elections)

tmp <-
e %>% filter(election_type == "parliament") %>%
  arrange(country_id, election_date) %>% group_by(country_id) %>%
  mutate(election_count = seq(n())) %>%
  select(country_id, election_date, election_count)

e <- left_join(e, tmp, by = c('country_id', 'election_date'))

# Time variable (age of democracy)
e <-
  e %>% mutate(age_of_democracy = election_year - democracy_since,
               age_of_democracy = ifelse(age_of_democracy < 0, 0, age_of_democracy))

# create lags and subset to elections in or before 1970 ------------------------

e <-
e %>% ungroup() %>% arrange(country_name, election_date) %>%
  group_by(country_name, election_type) %>%
  mutate(l_turnout = lag(turnout),
             d_turnout = c(NA, diff(turnout)),
             l_populistparty = lag(populistparty),
             d_populistparty = c(NA, diff(populistparty)),
             d_l_populistparty = lag(l_populistparty),
             l_populistparties = lag(populistparties),
             d_populistparties = c(NA, diff(populistparties)),
             l_populistvoteshare = lag(populistvoteshare),
             d_l_populistvoteshare = c(NA, diff(l_populistvoteshare)),
             l_populistmaxvoteshare = lag(populistmaxvoteshare),
             l_populistseatshare = lag(populistseatshare),
             d_l_populistseatshare = c(NA, diff(l_populistseatshare)),
             l_populistpresence = lag(populistpresence),
             d_l_populistpresence = c(NA, diff(l_populistpresence)),
             l_populist_new = lag(populist_new),
             l_populist_newparties = lag(populist_newparties),
             l_populistparty_right = lag(populistparty_right),
             d_populistparty_right = c(NA, diff(populistparty_right)),
             d_l_populistparty_right = c(NA, diff(l_populistparty_right)),
             l_populistparties_right = lag(populistparties_right),
             d_populistparties_right = c(NA, diff(populistparties_right)),
             l_populistvoteshare_right = lag(populistvoteshare_right),
             l_populistseatshare_right = lag(populistseatshare_right),
             d_l_populistvoteshare_right = c(NA, diff(l_populistvoteshare_right)),
             d_l_populistseatshare_right = c(NA, diff(l_populistseatshare_right)),
             l_populistmaxvoteshare_right = lag(populistmaxvoteshare_right),
             l_populistseatshare_right = lag(populistseatshare_right),
             l_populistpresence_right = lag(populistpresence_right),
             d_populistpresence_right = c(NA, diff(populistpresence_right)),
             d_l_populistpresence_right = c(NA, diff(l_populistpresence_right)),
             l_populist_new_right = lag(populist_new_right),
             l_populist_newrightparties = lag(populist_newrightparties),
             l_populistparty_left = lag(populistparty_left),
             d_populistparty_left = c(NA, diff(populistparty_left)),
             d_l_populistparty_left = c(NA, diff(l_populistparty_left)),
             l_populistparties_left = lag(populistparties_left),
             d_populistparties_left = c(NA, diff(populistparties_left)),
             l_populistvoteshare_left = lag(populistvoteshare_left),
             l_populistseatshare_left = lag(populistseatshare_left),
             d_l_populistvoteshare_left = c(NA, diff(l_populistvoteshare_left)),
             d_l_populistseatshare_left = c(NA, diff(l_populistseatshare_left)),
             l_populistmaxvoteshare_left = lag(populistmaxvoteshare_left),
             l_populistseatshare_left = lag(populistseatshare_left),
             l_populistpresence_left = lag(populistpresence_left),
             d_populistpresence_left = c(NA, diff(populistpresence_left)),
             d_l_populistpresence_left = c(NA, diff(l_populistpresence_left)),
             l_populist_new_left = lag(populist_new_left),
             l_populist_newleftparties = lag(populist_newleftparties),
             l_populist_legacy = lag(populist_legacy),
         d_populist_new = c(NA, diff(populist_new)),
         d_populist_new_left = c(NA, diff(populist_new_left)),
         d_populist_new_right = c(NA, diff(populist_new_right)),
         # Only left, only right, both
         d_populistparty_leftonly = c(NA, diff(populistparty_leftonly)),
         d_populistparty_rightonly = c(NA, diff(populistparty_rightonly)),
         d_populistparty_both = c(NA, diff(populistparty_both)),
         l_populistpresence_leftonly = l_populistpresence_left == T & l_populistpresence_right == F,
         d_l_populistpresence_leftonly = c(NA, diff(l_populistpresence_leftonly)),
         l_populistpresence_rightonly = l_populistpresence_left == F & l_populistpresence_right == T,
         d_l_populistpresence_rightonly = c(NA, diff(l_populistpresence_rightonly)),
         l_populistpresence_both = l_populistpresence_left == T & l_populistpresence_right == T,
         d_l_populistpresence_both = c(NA, diff(l_populistpresence_both)),
         d_populist_new_leftonly = c(NA, diff(populist_new_leftonly)),
         d_populist_new_rightonly = c(NA, diff(populist_new_rightonly)),
         d_populist_new_both = c(NA, diff(populist_new_both)),
         # difference control variables
         d_enp_seats = c(NA, diff(enp_seats)),
         d_unemp = c(NA, diff(unemp)),
         d_openc = c(NA, diff(openc)),
         population_mio = population / 1000000,
         d_population_mio = c(NA, diff(population_mio)),
         d_compulsory_voting = c(NA, diff(compulsory_voting)),
         d_legislative_type = c(NA, diff(legislative_type)),
         d_pr = c(NA, diff(pr)),
         d_disproportionality = c(NA, diff(disproportionality)),
         log_tier1_avemag = log(tier1_avemag+1),
         d_tier1_avemag= c(NA, diff(tier1_avemag)),
         d_log_tier1_avemag= c(NA, diff(log_tier1_avemag)),
         d_thresh = c(NA, diff(thresh)),
         log_margin = log(margin+1),
         d_margin = c(NA, diff(margin)),
         d_log_margin = c(NA, diff(log_margin)),
         log_age_of_democracy = log(age_of_democracy+1),
         d_age_of_democracy = c(NA, diff(age_of_democracy)),
         d_log_age_of_democracy = c(NA, diff(log_age_of_democracy))
         ) %>%
  filter(election_year >= 1970)

# create election period variable ----------------------------------------------
brks <- c(1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020)

e <-
  e %>% mutate(election_period = cut(election_year, breaks = brks, right = F))

# write data to disk

#csv
write.csv(e, 'data/master_elec.csv', row.names = F)

# Stata
e$populistparty_names[which(e$populistparty_names == '')] <-  '.'
e$populistparty_left_names[which(e$populistparty_left_names == '')] <-  '.'
e$populistparty_right_names[which(e$populistparty_right_names == '')] <-  '.'
write.dta(e, 'data/master_elec.dta')